import pandas as pd
import jieba
import matplotlib.pyplot as plt
from wordcloud import WordCloud

policy = pd.read_csv("policy/policy.csv")
print(policy.head())
print(policy.count())
for i in policy.values.tolist()[:5]:
    print(i)


# 去掉停用词
def remove_stop_words(f):
    stop_words = ['通知', '关于', '的通知', '的', '和', '重庆市', '重点', 'Vocal', '弦乐', 'Keyboard', '键盘', '编辑', '助理',
                  'Assistants', 'Mixing', 'Editing', 'Recording', '音乐', '制作', 'Producer', '发行', 'produced', 'and',
                  'distributed']
    for stop_word in stop_words:
        f = f.replace(stop_word, '')
    return f


# 生成词云
def create_word_cloud(f):
    print('根据词频，开始生成词云!')
    f = remove_stop_words(f)
    cut_text = " ".join(jieba.cut(f, cut_all=False, HMM=True))
    wc = WordCloud(
        font_path="./wc.ttf",
        max_words=100,
        width=2000,
        height=1200,
    )
    print(cut_text)
    wordcloud = wc.generate(cut_text)
    # 写词云图片
    wordcloud.to_file("word_cloud_url.jpg")
    # 显示词云文件
    plt.imshow(wordcloud)
    plt.axis("off")
    plt.show()


all_word = ""
for i in policy.values.tolist():
    all_word = all_word + i[17]
create_word_cloud(all_word)
